import os
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.tsa.vector_ar.var_model import VAR
from statsmodels.tsa.stattools import adfuller
from datetime import datetime
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from tqdm.auto import tqdm
from statsmodels.tsa.stattools import adfuller
import statsmodels.graphics.tsaplots as spl
import statsmodels.graphics.gofplots as gpl
import scipy.stats as ss
import seaborn as sns
import matplotlib.pyplot as plt
import yfinance as yf
plt.style.use("bmh")
plt.rcParams['font.family'] = 'DejaVu Sans'
import warnings
warnings.filterwarnings("ignore")
from arch import arch_model
np.random.seed(42)
bonds_file_names = ['/SU26205RMFS3_rates.csv',
'/SU26209RMFS5_rates.csv',
'/SU26212RMFS9_rates.csv',
'/SU26215RMFS2_rates.csv',
'/SU26217RMFS8_rates.csv']
bonds_names = [x[1:len('SU26209RMFS5')+1] for x in bonds_file_names]
shares_file_names = ["/SBER.ME.csv",
'/GAZP.ME.csv',
'/YNDX.ME.csv',
'/MGNT.ME.csv',
'/GMKN.ME.csv',
'/MTSS.ME.csv',
'/PIKK.ME.csv',
'/AFLT.ME.csv',
'/POLY.ME.csv',
'/RSTI.ME.csv']
shares_names = [x[1:5] for x in shares_file_names]
currencies_file_names = ['/D_USD.csv',
'/D_CNY.csv']
currencies_names = [x[3:6] for x in currencies_file_names]
risks_file_names = ['/IMOEX.ME.csv',
'/BZ=F.csv',
'/RTS.csv',
'/JPY.csv',
'/EUR.csv',
'/GBP.csv']
risks_names = [x.split('/')[1].split('.csv')[0] for x in risks_file_names]
bonds_folder = 'bonds'
shares_folder = 'shares'
currencies_folder = 'currencies'
risks_folder = 'risk_factors'
bonds = [bonds_folder + x for x in bonds_file_names]
shares = [shares_folder + x for x in shares_file_names]
currencies = [currencies_folder + x for x in currencies_file_names]
risks = [risks_folder + x for x in risks_file_names]
zero_bonds = risks_folder + '/zero_bonds.csv'
metals = risks_folder + '/refined_precious_metals.xlsx'
def load_df(bonds=bonds, bonds_names=bonds_names,
shares=shares, shares_names=shares_names,
currencies=currencies, currencies_names=currencies_names,
risks=risks, risks_names=risks_names,
zero_bonds=zero_bonds, metals=metals):
# Основной df
main_df = pd.read_csv(bonds[0], index_col='<DATE>')
main_df.index = pd.to_datetime(main_df.index, format='%Y%m%d')
main_df = main_df[[]]
main_df.index.names = ['Date']
# Загрузка облигаций
for i in range(len(bonds)):
df = pd.read_csv(bonds[i], index_col='<DATE>')
df.index = pd.to_datetime(df.index, format='%Y%m%d')
df = df[['<CLOSE>']].rename(columns={'<CLOSE>': bonds_names[i]})
df.index.names = ['Date']
main_df = main_df.join(df, how='left', on='Date')
# Загрузка акций
for i in range(len(shares)):
df = pd.read_csv(shares[i], index_col='Date')
df.index = pd.to_datetime(df.index)
df = df[['Adj Close']].rename(columns={'Adj Close': shares_names[i]})
main_df = main_df.join(df, how='left', on='Date')
# Загрузка валют
for i in range(len(currencies)):
df = pd.read_csv(currencies[i], index_col='Date')
df.index = pd.to_datetime(df.index)
df = df[['Adj Close']].rename(columns={'Adj Close': currencies_names[i]})
main_df = main_df.join(df, how='left', on='Date')
# Загрузка рисков
for i in range(len(risks)):
df = pd.read_csv(risks[i], index_col='Date')
df.index = pd.to_datetime(df.index)
df = df[['Adj Close']].rename(columns={'Adj Close': risks_names[i]})
main_df = main_df.join(df, how='left', on='Date')
# Загрузка металлов
df = pd.read_excel(metals, index_col='Date')
df.index = pd.to_datetime(df.index)
main_df = main_df.join(df, how='left', on='Date')
# Загрузка zero_bonds
df = pd.read_csv(zero_bonds, index_col='Date')
df.index = pd.to_datetime(df.index)
main_df = main_df.join(df, how='left', on='Date')
# Заполнение пропусков
main_df.fillna(method='ffill', inplace=True)
main_df.fillna(method='backfill', inplace=True)
return main_df
prices = load_df()
bonds = prices.iloc[:, 0:5]
shares = prices.iloc[:, 5:15]
currencies = prices.iloc[:, 15:17]
risk_factors = prices.iloc[:, 17:]
def plot_cor_matrix(risk_factors: pd.DataFrame) -> None:
corr_matrix = risk_factors.corr()
fig = plt.figure(figsize=(10, 10))
ax = plt.subplot()
sns.heatmap(corr_matrix, annot=True)
plt.show()
plot_cor_matrix(risk_factors)
def risk_factor_describe(risk_factor: pd.Series) -> None:
risk_factor_name = risk_factor.name
print(f"{'=' * 10} {risk_factor_name.upper()} {'=' * 10}")
print(f"mean: {np.round(np.mean(risk_factor), 2)}")
print(f"std: {np.round(np.std(risk_factor), 2)}")
print(f"min: {np.round(np.min(risk_factor), 2)}")
print(f"max: {np.round(np.max(risk_factor), 2)}")
adfuller_pv = adfuller(risk_factor)[1]
print(
f"The Augmented Dickey-Fuller test, p-value: {adfuller_pv}, "
f"series is {'non ' if adfuller_pv >= 0.05 else ''}stationary"
)
risk_factor_change = np.log(risk_factor).diff().iloc[1:]
adfuller_pv = adfuller(risk_factor_change)[1]
print(
f"The Augmented Dickey-Fuller test for pct change, p-value: {adfuller_pv}, "
f"series is {'non ' if adfuller_pv >= 0.05 else ''}stationary"
)
_, norm_pv = ss.shapiro(risk_factor_change)
print(
f"The Shapiro–Wilk test, p-value: {norm_pv}, "
f"series is {'' if norm_pv >= 0.05 else 'non '}normal"
)
fig = plt.figure(figsize=(15, 15))
ax = plt.subplot(511)
sns.lineplot(x=risk_factor.index, y=risk_factor, ax=ax)
plt.title('Price')
ax = plt.subplot(512)
sns.lineplot(x=risk_factor_change.index, y=risk_factor_change, ax=ax)
plt.title('Price percent change')
ax = plt.subplot(525)
spl.plot_acf(risk_factor_change, lags=20, ax=ax)
ax = plt.subplot(526)
spl.plot_pacf(risk_factor_change, lags=20, ax=ax)
ax = plt.subplot(527)
normal_dist = gpl.ProbPlot(risk_factor_change, fit=True)
normal_dist.qqplot(line='r', ax=ax)
plt.title('Q-Q')
ax = plt.subplot(528)
normal_dist.ppplot(line='45', ax=ax)
plt.title('P-P')
ax = plt.subplot(529)
normal_dist.probplot(line='r', ax=ax)
plt.title('Prob')
ax = plt.subplot(5, 2, 10)
sns.distplot(risk_factor_change, ax=ax, label='true', norm_hist=True, kde=False)
plt.title('Distribution')
params_norm = ss.norm.fit(risk_factor_change)
params_t = ss.t.fit(risk_factor_change)
vals, grid, _ = ax.hist(risk_factor_change, 50, density=True, color='blue')
ax.plot(grid, ss.norm(*params_norm).pdf(grid), label='norm')
ax.plot(grid, ss.t(*params_t).pdf(grid),label='Student')
plt.legend()
plt.tight_layout()
plt.show()
for col in risk_factors.columns:
risk_factor_describe(risk_factors[col])
========== IMOEX.ME ========== mean: 2478.9 std: 360.04 min: 1817.82 max: 3289.02 The Augmented Dickey-Fuller test, p-value: 0.8341597619769606, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 1.6222236112353364e-25, series is stationary The Shapiro–Wilk test, p-value: 3.8964872967045697e-28, series is non normal
========== BZ=F ========== mean: 58.54 std: 12.76 min: 19.33 max: 86.29 The Augmented Dickey-Fuller test, p-value: 0.29558777916016676, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 4.899160511490788e-07, series is stationary The Shapiro–Wilk test, p-value: 1.8850017623074667e-35, series is non normal
========== RTS ========== mean: 1208.12 std: 133.8 min: 832.26 max: 1646.6 The Augmented Dickey-Fuller test, p-value: 0.2724882464435697, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 3.303404341708835e-19, series is stationary The Shapiro–Wilk test, p-value: 1.7915367690328323e-37, series is non normal
========== JPY ========== mean: 0.56 std: 0.03 min: 0.5 max: 0.64 The Augmented Dickey-Fuller test, p-value: 0.21231121567861028, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 1.205409046415594e-25, series is stationary The Shapiro–Wilk test, p-value: 1.2512403652192727e-23, series is non normal
========== EUR ========== mean: 70.47 std: 4.0 min: 59.57 max: 81.81 The Augmented Dickey-Fuller test, p-value: 0.24174034466135724, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 0.0, series is stationary The Shapiro–Wilk test, p-value: 6.021887063064245e-25, series is non normal
========== GBP ========== mean: 80.67 std: 4.09 min: 69.6 max: 91.88 The Augmented Dickey-Fuller test, p-value: 0.15797404042581398, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 0.0, series is stationary The Shapiro–Wilk test, p-value: 2.9390716711128583e-21, series is non normal
========== GOLD ========== mean: 2983.32 std: 746.72 min: 2232.16 max: 4887.7 The Augmented Dickey-Fuller test, p-value: 0.970707983245699, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 5.603819012885486e-30, series is stationary The Shapiro–Wilk test, p-value: 3.6942589349393875e-29, series is non normal
========== ARGENT ========== mean: 36.29 std: 9.17 min: 29.51 max: 68.34 The Augmented Dickey-Fuller test, p-value: 0.943344377504336, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 2.831574317955673e-11, series is stationary The Shapiro–Wilk test, p-value: 1.2357259466744523e-32, series is non normal
========== PLATINE ========== mean: 1848.12 std: 170.84 min: 1522.76 max: 2473.81 The Augmented Dickey-Fuller test, p-value: 0.950871410902307, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 3.5592267710500554e-27, series is stationary The Shapiro–Wilk test, p-value: 2.853462578111193e-32, series is non normal
========== PALLADIUM ========== mean: 2999.15 std: 1393.67 min: 1379.06 max: 6098.86 The Augmented Dickey-Fuller test, p-value: 0.9819060821375514, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 9.595576564741633e-25, series is stationary The Shapiro–Wilk test, p-value: 1.5966714053280207e-33, series is non normal
========== 0.25 ========== mean: 6.62 std: 1.46 min: 3.68 max: 9.82 The Augmented Dickey-Fuller test, p-value: 0.9145344034994012, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 0.0, series is stationary The Shapiro–Wilk test, p-value: 1.6398283726809957e-31, series is non normal
========== 0.5 ========== mean: 6.64 std: 1.42 min: 3.87 max: 9.51 The Augmented Dickey-Fuller test, p-value: 0.9143124925299795, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 0.0, series is stationary The Shapiro–Wilk test, p-value: 8.288271371567377e-32, series is non normal
========== 0.75 ========== mean: 6.66 std: 1.38 min: 4.02 max: 9.27 The Augmented Dickey-Fuller test, p-value: 0.9021490752383663, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 0.0, series is stationary The Shapiro–Wilk test, p-value: 2.5358980037907518e-33, series is non normal
========== 1 ========== mean: 6.69 std: 1.35 min: 4.12 max: 9.1 The Augmented Dickey-Fuller test, p-value: 0.9242625911753577, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 7.1724906631493e-22, series is stationary The Shapiro–Wilk test, p-value: 4.57275251980006e-35, series is non normal
========== 2 ========== mean: 6.8 std: 1.25 min: 4.38 max: 8.68 The Augmented Dickey-Fuller test, p-value: 0.9097458147543342, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 2.6419301658138054e-21, series is stationary The Shapiro–Wilk test, p-value: 1.5035454679428952e-38, series is non normal
========== 3 ========== mean: 6.9 std: 1.18 min: 4.6 max: 8.65 The Augmented Dickey-Fuller test, p-value: 0.871143923257319, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 1.596540747497765e-20, series is stationary The Shapiro–Wilk test, p-value: 1.4787309664509692e-37, series is non normal
========== 5 ========== mean: 7.09 std: 1.06 min: 4.95 max: 9.02 The Augmented Dickey-Fuller test, p-value: 0.7514487725858088, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 2.5843196791238506e-14, series is stationary The Shapiro–Wilk test, p-value: 4.5505639159845875e-35, series is non normal
========== 7 ========== mean: 7.24 std: 0.97 min: 5.22 max: 9.19 The Augmented Dickey-Fuller test, p-value: 0.7010514948984194, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 1.567642884904565e-17, series is stationary The Shapiro–Wilk test, p-value: 6.275568528051752e-34, series is non normal
========== 10 ========== mean: 7.43 std: 0.88 min: 5.59 max: 9.29 The Augmented Dickey-Fuller test, p-value: 0.6629083396090492, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 1.1617756186882382e-17, series is stationary The Shapiro–Wilk test, p-value: 5.602457503896871e-34, series is non normal
========== 15 ========== mean: 7.66 std: 0.81 min: 5.97 max: 9.26 The Augmented Dickey-Fuller test, p-value: 0.6139933089641, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 9.774613818203393e-18, series is stationary The Shapiro–Wilk test, p-value: 2.623877951946443e-32, series is non normal
========== 20 ========== mean: 7.84 std: 0.79 min: 6.17 max: 9.2 The Augmented Dickey-Fuller test, p-value: 0.601541555046818, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 5.036880165630602e-18, series is stationary The Shapiro–Wilk test, p-value: 1.0485727086849075e-29, series is non normal
========== 30 ========== mean: 8.09 std: 0.85 min: 6.37 max: 9.57 The Augmented Dickey-Fuller test, p-value: 0.6419590589333192, series is non stationary The Augmented Dickey-Fuller test for pct change, p-value: 1.6271056205574663e-20, series is stationary The Shapiro–Wilk test, p-value: 4.461842885951126e-26, series is non normal
def get_log(df) -> pd.DataFrame:
df_log = df.copy()
for col in df_log.columns:
df_log[col] = np.log(df_log[col]).diff()
return df_log.dropna()
def gbm_sim_for_log_diff(PCA, T=10, N=1000):
''' Generate Monte Carlo paths for geometric Brownian motion
PCAs: DataFrame of noncorrelated features
T: period for prediction
N: num of simularions
Returns
=======
paths : ndarray, shape (T, N)
simulated paths given the parameters
'''
return np.vstack((np.full((1, N), fill_value=PCA.iloc[-1]), np.random.normal(loc=PCA.mean(), scale=np.std(PCA), size=(T, N))))
def gbm_sim_for_log_diff_t_dist(PCA, T, N):
from scipy.stats import t
''' Generate Monte Carlo paths for geometric Brownian motion
PCAs: DataFrame of noncorrelated features
T: period for prediction
N: num of simularions
Returns
=======
paths : ndarray, shape (T, N)
simulated paths given the parameters
'''
df, loc, scale = t.fit(PCA)
return np.vstack((np.full((1, N), fill_value=PCA.iloc[-1]), t.rvs(df, loc, scale, size=(T, N))))
Рисуем симуляции
def get_plot_simulation(array, df) -> None:
for i, col in enumerate(df.columns):
plt.figure(figsize=(9,6))
plt.xlim(-0.05, array.shape[1]-1)
plt.title(col, size=18)
plt.xlabel('time', size=14)
plt.ylabel('price', size=14)
plt.plot(array[i])
plt.show()
# risk_factors.columns
interest_rates = risk_factors[['0.25', '0.5', '0.75',
'1', '2', '3', '5', '7',
'10', '15', '20', '30']]
ir_log = get_log(interest_rates)
sns.heatmap(ir_log.corr())
plt.title('Корреляция процентных ставок')
plt.show()
for n in range(ir_log.shape[1]-6):
dim_red = PCA(n_components = n + 1, random_state=42)
dim_red.fit(ir_log)
print(" {} component(s)\n explained variance: {}\n".format(n + 1, np.sum(dim_red.explained_variance_ratio_)))
1 component(s) explained variance: 0.6780849292951865 2 component(s) explained variance: 0.9088894837712045 3 component(s) explained variance: 0.9669646967686475 4 component(s) explained variance: 0.9874861187245191 5 component(s) explained variance: 0.9949896795651655 6 component(s) explained variance: 0.9976056959943999
PCA_ir = PCA(n_components=3, random_state=42)
ir_pca = pd.DataFrame(PCA_ir.fit_transform(ir_log),
columns=['ir_comp_1', 'ir_comp_2', 'ir_comp_3'])
sim_ir_vas = []
for col in ir_pca.columns:
sim = gbm_sim_for_log_diff(ir_pca[col], T=10, N=100)
sim_ir_vas.append(sim)
sim_ir_vas = np.array(sim_ir_vas)
get_plot_simulation(sim_ir_vas, ir_pca)
metal = risk_factors[['Gold', 'Argent', 'Platine', 'Palladium']]
metal_log = get_log(metal)
metal = risk_factors[['Gold', 'Argent', 'Platine', 'Palladium']]
metal_log = get_log(metal)
sim_met = []
for col in metal_log.columns:
sim = gbm_sim_for_log_diff(metal_log[col])
sim_met.append(sim)
sim_met = np.array(sim_met)
get_plot_simulation(sim_met, metal_log)
idx_df = risk_factors[['IMOEX.ME', 'RTS']]
idx_df_log = get_log(idx_df)
sim_idx = []
for col in idx_df_log.columns:
sim = gbm_sim_for_log_diff(idx_df_log[col], T=10, N=100)
sim_idx.append(sim)
sim_idx = np.array(sim_idx)
get_plot_simulation(sim_idx, idx_df_log[['IMOEX.ME']])
get_plot_simulation(sim_idx, idx_df_log[['RTS']])
cur_oil = risk_factors[['BZ=F', 'JPY', 'EUR', 'GBP']]
cur_oil_log = get_log(cur_oil)
sns.heatmap(cur_oil_log.corr())
plt.show()
pca_dim = PCA(n_components = 2, random_state=42)
cur_oil_pca = pd.DataFrame(pca_dim.fit_transform(cur_oil_log), columns=['cur_oil_comp_1', 'cur_oil_comp_2'])
print("Explained variance ratio: {}".format(np.sum(pca_dim.explained_variance_ratio_)))
Explained variance ratio: 0.9704124318007362
sim_cur_oil = []
for col in cur_oil_pca.columns:
sim = gbm_sim_for_log_diff(cur_oil_pca[col])
sim_cur_oil.append(sim)
sim_cur_oil = np.array(sim_cur_oil)
get_plot_simulation(sim_cur_oil, cur_oil_pca)
pca = PCA(n_components = 2, random_state=42)
reduced_indexes = pca.fit_transform(idx_df_log)
print(np.sum(pca.explained_variance_ratio_))
reduced_curr_oil = pca.fit_transform(cur_oil_log)
print(np.sum(pca.explained_variance_ratio_))
reduced_rates = pca.fit_transform(ir_log)
print(np.sum(pca.explained_variance_ratio_))
reduced_metal = pca.fit_transform(metal_log)
print(np.sum(pca.explained_variance_ratio_))
1.0 0.9704124318007362 0.9088894837712045 0.8558776828045839
reduced_data = pd.DataFrame(np.hstack([reduced_indexes, reduced_curr_oil, reduced_rates, reduced_metal]),
columns=['reduced_indexes_1', 'reduced_indexes_2',
'reduced_curr_oil_1', 'reduced_curr_oil_2',
'reduced_rates_1', 'reduced_rates_2',
'reduced_metal_1', 'reduced_metal_2',],
index=idx_df_log.index)
reduced_data.diff().dropna().plot(figsize=(20, 8))
plt.show()
fig, ax = plt.subplots(len(reduced_data.columns), 1, figsize=(8, 24))
for i, col in enumerate(reduced_data.columns):
r = reduced_data[col].diff().dropna()
params_norm = ss.norm.fit(r)
params_t = ss.t.fit(r)
vals, grid, _ = ax[i].hist(r, 50, density=True)
ax[i].plot(grid, ss.norm(*params_norm).pdf(grid), label='norm')
ax[i].plot(grid, ss.t(*params_t).pdf(grid),label='Student');
ax[i].legend()
ax[i].set_title(f'Распределение\nreduced_data[{col}].diff()')
plt.tight_layout()
obligation_coupons_files = ['bonds\SU26205RMFS3.csv', 'bonds\SU26209RMFS5.csv', 'bonds\SU26212RMFS9.csv',
'bonds\SU26215RMFS2.csv', 'bonds\SU26217RMFS8.csv']
def payment_per_day_bonds(obligation_coupons_files=obligation_coupons_files):
obligation_coupons = []
for i in range(len(obligation_coupons_files)):
obligation_coupon = pd.read_csv(obligation_coupons_files[i])
obligation_coupon.date = pd.to_datetime(obligation_coupon.date)
days = (obligation_coupon[['date']].iloc[-2] \
- obligation_coupon[['date']].iloc[-3]).values[0].astype('timedelta64[D]').astype(int)
payment_per_day = obligation_coupon['payment'].iloc[-2] / days
obligation_coupons.append(payment_per_day)
return np.round(obligation_coupons, 5)
def calculate_portfolio_distribution(bonds=bonds,
shares=shares,
currencies=currencies):
dirty_bonds = bonds + payment_per_day_bonds()
df_for_portfolio = dirty_bonds.join((shares, currencies), how='left')
position_0 = np.array([10e6] * 5 + [1e6] * 10 + [10e6] * 2)
day_prices = df_for_portfolio.iloc[0]
true_qty = position_0 / day_prices
qty = np.round(position_0 / day_prices, 0).astype(int)
portfolio_qty_shares_true = (true_qty * day_prices) / (true_qty * day_prices).sum()
all_qty = qty
all_price = day_prices
for t in range(1, df_for_portfolio.shape[0]):
day_prices = df_for_portfolio.iloc[t-1]
wts = (qty * day_prices) / np.sum(qty * day_prices)
delta_wts = portfolio_qty_shares_true / wts
qty = np.round(qty * delta_wts, 0).astype(int)
all_qty = np.vstack((all_qty, qty))
all_price = np.vstack((all_price, day_prices))
number_of_assets = pd.DataFrame(all_qty, columns=df_for_portfolio.columns, index=dirty_bonds.index)
prices_of_assets = pd.DataFrame(all_price, columns=df_for_portfolio.columns, index=dirty_bonds.index)
total_sum = pd.DataFrame(all_qty*all_price, columns=df_for_portfolio.columns, index=dirty_bonds.index)
return number_of_assets, prices_of_assets, total_sum
sum_portfolio
| SU26205RMFS3 | SU26209RMFS5 | SU26212RMFS9 | SU26215RMFS2 | SU26217RMFS8 | SBER | GAZP | YNDX | MGNT | GMKN | MTSS | PIKK | AFLT | POLY | RSTI | USD | CNY | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||
| 2017-01-03 | 9.999995e+06 | 9.999985e+06 | 9.999966e+06 | 1.000005e+07 | 1.000001e+07 | 9.999548e+05 | 9.999424e+05 | 1.000246e+06 | 1.000197e+06 | 9.975757e+05 | 1.000027e+06 | 9.999310e+05 | 9.999374e+05 | 9.996933e+05 | 1.000000e+06 | 9.999989e+06 | 9.999999e+06 |
| 2017-01-04 | 9.999699e+06 | 9.999689e+06 | 9.999691e+06 | 9.999669e+06 | 9.999712e+06 | 9.999548e+05 | 9.999424e+05 | 1.000246e+06 | 1.000197e+06 | 9.975757e+05 | 1.000027e+06 | 9.999310e+05 | 9.999374e+05 | 9.996933e+05 | 9.999686e+05 | 9.999683e+06 | 9.999691e+06 |
| 2017-01-05 | 9.996329e+06 | 9.996336e+06 | 9.996366e+06 | 9.996339e+06 | 9.996370e+06 | 9.996598e+05 | 9.996056e+05 | 9.994400e+05 | 9.976027e+05 | 9.996008e+05 | 9.997099e+05 | 9.996513e+05 | 9.995957e+05 | 9.994833e+05 | 9.996360e+05 | 9.996368e+06 | 9.996351e+06 |
| 2017-01-06 | 9.973018e+06 | 9.973020e+06 | 9.973000e+06 | 9.973011e+06 | 9.972973e+06 | 9.973288e+05 | 9.972452e+05 | 9.978440e+05 | 1.000270e+06 | 9.964904e+05 | 9.972191e+05 | 9.971822e+05 | 9.973455e+05 | 9.973153e+05 | 9.972982e+05 | 9.972962e+06 | 9.972983e+06 |
| 2017-01-09 | 9.945517e+06 | 9.945516e+06 | 9.945545e+06 | 9.945546e+06 | 9.945548e+06 | 9.945684e+05 | 9.945077e+05 | 9.940700e+05 | 9.942148e+05 | 9.911925e+05 | 9.945873e+05 | 9.945176e+05 | 9.945809e+05 | 9.944850e+05 | 9.945544e+05 | 9.945543e+06 | 9.945542e+06 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2020-12-24 | 1.241331e+07 | 1.241331e+07 | 1.241329e+07 | 1.241336e+07 | 1.241331e+07 | 1.241220e+06 | 1.241396e+06 | 1.241244e+06 | 1.242241e+06 | 1.237020e+06 | 1.241449e+06 | 1.241170e+06 | 1.241309e+06 | 1.241219e+06 | 1.241334e+06 | 1.241335e+07 | 1.241334e+07 |
| 2020-12-25 | 1.236618e+07 | 1.236622e+07 | 1.236617e+07 | 1.236621e+07 | 1.236620e+07 | 1.236710e+06 | 1.236584e+06 | 1.235000e+06 | 1.238162e+06 | 1.231190e+06 | 1.236714e+06 | 1.236794e+06 | 1.236588e+06 | 1.237071e+06 | 1.236619e+06 | 1.236618e+07 | 1.236619e+07 |
| 2020-12-28 | 1.233878e+07 | 1.233874e+07 | 1.233875e+07 | 1.233874e+07 | 1.233876e+07 | 1.233851e+06 | 1.233787e+06 | 1.231712e+06 | 1.234243e+06 | 1.238186e+06 | 1.233846e+06 | 1.233874e+06 | 1.233875e+06 | 1.233646e+06 | 1.233878e+06 | 1.233880e+07 | 1.233877e+07 |
| 2020-12-29 | 1.235623e+07 | 1.235627e+07 | 1.235620e+07 | 1.235620e+07 | 1.235628e+07 | 1.235660e+06 | 1.235666e+06 | 1.233198e+06 | 1.233744e+06 | 1.229072e+06 | 1.235667e+06 | 1.235852e+06 | 1.235624e+06 | 1.235177e+06 | 1.235624e+06 | 1.235624e+07 | 1.235624e+07 |
| 2020-12-30 | 1.236769e+07 | 1.236769e+07 | 1.236769e+07 | 1.236771e+07 | 1.236771e+07 | 1.236787e+06 | 1.236771e+06 | 1.235927e+06 | 1.235290e+06 | 1.229176e+06 | 1.236845e+06 | 1.237062e+06 | 1.236784e+06 | 1.236571e+06 | 1.236768e+06 | 1.236768e+07 | 1.236768e+07 |
1008 rows × 17 columns
number_of_assets, prices_of_assets, sum_portfolio = calculate_portfolio_distribution()
Разделим по видам активов
num_bonds = number_of_assets.iloc[:, 0:5]
num_shares = number_of_assets.iloc[:, 5:15]
num_currencies = number_of_assets.iloc[:, 15:17]
bonds_ = prices_of_assets.iloc[:, 0:5]
shares_ = prices_of_assets.iloc[:, 5:15]
currencies_ = prices_of_assets.iloc[:, 15:17]
# риск факторы для подпортфелей
rf_bonds = risk_factors[['IMOEX.ME', 'RTS', 'Gold', 'Argent', 'Platine', 'Palladium',
'0.25', '0.5', '0.75', '1', '2', '3', '5', '7', '10', '15', '20', '30']]
rf_shares = rf_bonds.copy()
rf_currencies = risk_factors[['BZ=F', 'JPY', 'EUR', 'GBP']]
num_bonds.plot(figsize=(16, 5))
plt.title('Number of Bonds during investments')
plt.show()
num_shares.drop('RSTI', axis=1).plot(figsize=(16, 5))
plt.title('Number of Shares during investments')
plt.show()
num_currencies.plot(figsize=(16, 5))
plt.title('Number of Currencies during investments')
plt.show()
risk_factors
| IMOEX.ME | BZ=F | RTS | D_JPY | D_EUR | D_GBP | Gold | Argent | Platine | Palladium | ... | 0.75 | 1 | 2 | 3 | 5 | 7 | 10 | 15 | 20 | 30 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||
| 2017-01-03 | 2285.429932 | 55.470001 | 1109.39 | 0.521610 | 64.056503 | 75.184998 | 2264.82 | 31.68 | 1869.86 | 1455.83 | ... | 8.43 | 8.40 | 8.25 | 8.21 | 8.22 | 8.28 | 8.42 | 8.66 | 8.85 | 9.09 |
| 2017-01-04 | 2263.899902 | 56.459999 | 1109.39 | 0.517870 | 63.111469 | 74.559998 | 2264.82 | 31.68 | 1869.86 | 1455.83 | ... | 8.43 | 8.39 | 8.18 | 8.14 | 8.19 | 8.29 | 8.44 | 8.69 | 8.88 | 9.12 |
| 2017-01-05 | 2220.350098 | 56.889999 | 1109.39 | 0.514930 | 62.432350 | 74.359001 | 2264.82 | 31.68 | 1869.86 | 1455.83 | ... | 8.42 | 8.37 | 8.17 | 8.13 | 8.16 | 8.24 | 8.37 | 8.63 | 8.84 | 9.09 |
| 2017-01-06 | 2213.929932 | 57.099998 | 1045.63 | 0.514980 | 62.352619 | 73.753998 | 2264.82 | 31.68 | 1869.86 | 1455.83 | ... | 8.36 | 8.35 | 8.24 | 8.17 | 8.15 | 8.21 | 8.35 | 8.60 | 8.80 | 9.06 |
| 2017-01-09 | 2211.250000 | 54.939999 | 1100.58 | 0.509430 | 62.233082 | 73.193001 | 2264.82 | 31.68 | 1869.86 | 1455.83 | ... | 8.56 | 8.49 | 8.23 | 8.16 | 8.11 | 8.12 | 8.21 | 8.43 | 8.63 | 8.90 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2020-12-24 | 3236.879883 | 51.200001 | 1378.33 | 0.566937 | 69.389999 | 81.236923 | 4529.59 | 62.52 | 2459.96 | 5667.14 | ... | 4.32 | 4.39 | 4.70 | 5.03 | 5.54 | 5.89 | 6.27 | 6.65 | 6.85 | 7.06 |
| 2020-12-25 | 3236.879883 | 51.200001 | 1378.36 | 0.566937 | 69.389999 | 81.236923 | 4505.61 | 60.91 | 2456.66 | 5601.48 | ... | 4.30 | 4.37 | 4.68 | 5.00 | 5.51 | 5.88 | 6.26 | 6.61 | 6.79 | 6.96 |
| 2020-12-28 | 3258.949951 | 50.860001 | 1391.31 | 0.566937 | 69.389999 | 81.236923 | 4505.61 | 60.91 | 2456.66 | 5601.48 | ... | 4.25 | 4.33 | 4.65 | 4.97 | 5.49 | 5.86 | 6.26 | 6.63 | 6.82 | 7.00 |
| 2020-12-29 | 3274.669922 | 51.090000 | 1398.48 | 0.566937 | 69.389999 | 81.236923 | 4438.08 | 61.10 | 2419.84 | 5517.53 | ... | 4.11 | 4.21 | 4.60 | 4.94 | 5.46 | 5.84 | 6.24 | 6.65 | 6.87 | 7.07 |
| 2020-12-30 | 3289.020020 | 51.340000 | 1387.46 | 0.566937 | 69.389999 | 81.236923 | 4437.61 | 61.05 | 2465.21 | 5612.44 | ... | 4.05 | 4.18 | 4.57 | 4.91 | 5.47 | 5.87 | 6.27 | 6.62 | 6.80 | 6.96 |
1008 rows × 22 columns
def get_VaR_ES(data, num_assets, risk_factors, num_sims, L_history = 150, horizon = 1):
rr = get_log(data).dropna()
risk_factors = get_log(risk_factors).dropna()
w = ((data*num_assets).T/(data*num_assets).sum(axis=1).values).T
VaR, VaR_for_ES, ES = np.full(rr.shape, np.nan), np.full(rr.shape, np.nan), np.full(rr.shape, np.nan)
for j, col in enumerate(rr):
r = rr[col]
for i in tqdm(range(L_history, len(r)-1)):
history = r[i - L_history: i+1]
if risk_factors.shape[1] > 4:
dim_red = PCA(n_components = 5, random_state=42)
risk_factors_ = risk_factors.iloc[i - L_history: i+1]
risk_factors_ = pd.DataFrame(dim_red.fit_transform(risk_factors_),
columns=['ir_comp_1', 'ir_comp_2', 'ir_comp_3', 'ir_comp_4', 'ir_comp_5'])
else:
dim_red = PCA(n_components = 2, random_state=42)
risk_factors_ = risk_factors.iloc[i - L_history: i+1]
risk_factors_ = pd.DataFrame(dim_red.fit_transform(risk_factors_),
columns=['ir_comp_1', 'ir_comp_2'])
paths_dict = dict()
for col in risk_factors_.columns:
paths_dict[col] = gbm_sim_for_log_diff(risk_factors_[col], T=horizon, N=num_sims)[-1]
sims = pd.DataFrame(np.array(list(paths_dict.values())).squeeze().T,
index=range(num_sims), columns = risk_factors_.columns)
GB = GradientBoostingRegressor(random_state=42)
GB.fit(risk_factors_, history)
pred = GB.predict(sims)
VaR[i, j] = np.quantile(pred, 0.01)
VaR_for_ES[i, j] = np.quantile(pred, 0.025)
ES[i, j] = history[history <= VaR_for_ES[i, j]].mean()
VaR_per_asset = pd.DataFrame(data=VaR, index=rr.index)
ES_per_asset = pd.DataFrame(data=ES, index=rr.index)
VaR_per_asset.columns = w.columns
ES_per_asset.columns = w.columns
VaR = pd.Series(data=(VaR_per_asset * w).sum(axis=1), index=rr.index)
ES = pd.Series(data=(ES_per_asset * w).sum(axis=1), index=rr.index)
return VaR, ES, VaR_per_asset, ES_per_asset
def plot_VaR_ES(data, VaR, ES, num_assets, L_history=180, plot_title=''):
rr = get_log(data).dropna()
w = ((data*num_assets).T/(data*num_assets).sum(axis=1).values).T[:-1]
VaR.columns = w.columns
ES.columns = w.columns
plt.figure(figsize=(16, 8))
plt.plot(pd.DataFrame((rr * w).sum(axis=1)), alpha=0.4, label='Daily Weighted Log Returns')
plt.plot(pd.DataFrame((VaR * w).sum(axis=1)[L_history+1:]), color='red', label='VaR')
plt.plot(pd.DataFrame((ES * w).sum(axis=1)[L_history+1:]), color='blue', label='ES')
plt.title(plot_title)
plt.legend()
plt.show()
# 1 день
bonds_VaR_1, bonds_ES_1, bonds_VaR_per_1, bonds_ES_per_1 \
= get_VaR_ES(bonds_, num_bonds, rf_bonds, 200, L_history = 180, horizon=1)
plot_VaR_ES(bonds_, bonds_VaR_per_1, bonds_ES_per_1, num_bonds, L_history=180,
plot_title ='Log Bonds daily returns, VaR and ES curves, horizon - 1')
# 10 дней
bonds_VaR_10, bonds_ES_10, bonds_VaR_per_10, bonds_ES_per_10 \
= get_VaR_ES(bonds_, num_bonds, rf_bonds, 200, L_history = 180, horizon=10)
plot_VaR_ES(bonds_, bonds_VaR_per_10, bonds_ES_per_10, num_bonds, L_history=180,
plot_title ='Log Bonds daily returns, VaR and ES curves, horizon - 10')
# 1 день
shares_VaR_1, shares_ES_1, shares_VaR_per_1, shares_ES_per_1 \
= get_VaR_ES(shares_, num_shares, rf_shares, 200, L_history = 180, horizon=1)
plot_VaR_ES(shares_, shares_VaR_per_1, shares_ES_per_1, num_shares, L_history=180,
plot_title ='Log Shares daily returns, VaR and ES curves, horizon - 1')
# 10 день
shares_VaR_10, shares_ES_10, shares_VaR_per_10, shares_ES_per_10 \
= get_VaR_ES(shares_, num_shares, rf_shares, 200, L_history = 180, horizon = 10 )
plot_VaR_ES(shares_, shares_VaR_per_10, shares_ES_per_10, num_shares, L_history=180,
plot_title ='Log Shares daily returns, VaR and ES curves, horizon - 10')
# 1 день
currencies_VaR_1, currencies_ES_1, currencies_VaR_per_1, currencies_ES_per_1 \
= get_VaR_ES(currencies_, num_currencies, rf_currencies, 200, L_history = 180, horizon=1)
plot_VaR_ES(currencies_, currencies_VaR_per_1, currencies_ES_per_1, num_currencies, L_history=180,
plot_title ='Log Currencies daily returns, VaR and ES curves, horizon - 1')
# 10 день
currencies_VaR_10, currencies_ES_10, currencies_VaR_per_10, currencies_ES_per_10 \
= get_VaR_ES(currencies_, num_currencies, rf_currencies, 200, L_history = 180, horizon=10)
plot_VaR_ES(currencies_, currencies_VaR_per_10, currencies_ES_per_10, num_currencies, L_history=180,
plot_title ='Log Currencies daily returns, VaR and ES curves, horizon - 10')
# 1 день
portfolio_VaR_1, portfolio_ES_1, portfolio_VaR_per_1, portfolio_ES_per_1 \
= get_VaR_ES(prices_of_assets, number_of_assets, risk_factors, 200, L_history = 180, horizon=1)
plot_VaR_ES(prices_of_assets, portfolio_VaR_per_1, portfolio_ES_per_1, number_of_assets, L_history=180,
plot_title ='Log Shares daily returns, VaR and ES curves, horizon - 1')
# 10 день
portfolio_VaR_10, portfolio_ES_10, portfolio_VaR_per_10, portfolio_ES_per_10 \
= get_VaR_ES(prices_of_assets, number_of_assets, risk_factors, 200, L_history = 180, horizon=10)
plot_VaR_ES(prices_of_assets, portfolio_VaR_per_10, portfolio_ES_per_10, number_of_assets, L_history=180,
plot_title ='Log Shares daily returns, VaR and ES curves, horizon - 10')
def calc_VaR_and_ES(r, VaR_fun, L_history=170, level_VaR=0.01, level_ES=0.025, horizon=1):
VaR, VaR_for_ES, ES = np.full(r.shape[0], np.nan), np.full(r.shape[0], np.nan), np.full(r.shape[0], np.nan)
for i in tqdm(range(L_history, len(r))):
history = r[i - L_history: i]
h = np.dot(history, np.array([1/r.shape[1] for x in range(r.shape[1])]))
VaR[i] = VaR_fun(h, level_VaR, horizon)
if VaR[i] < -0.25:
VaR[i] = VaR[i-3]
VaR_for_ES[i] = VaR_fun(h, level_ES, horizon)
if VaR_for_ES[i] < -0.25:
VaR[i] = VaR[i-3]
ES[i] = h[h < VaR_for_ES[i]].mean()
VaR = pd.Series(data=VaR, index=r.index, name=VaR_fun.__name__)
ES = pd.Series(data=ES, index=r.index, name=VaR_fun.__name__)
return VaR, ES
def calculate_VaR_HS(ret, alpha=0.01, horizon=1):
return np.quantile(ret, alpha)
def calculate_VaR_garch(returns, alpha, h=1):
scaling_const = 10 / returns.std()
mdl = arch_model(returns * scaling_const,
mean='Constant', lags=1,
vol='GARCH', p=1, o=1,
q=1, dist='skewt')
res = mdl.fit(disp='off', show_warning=False)
forecasts = res.forecast(horizon=h)
cond_mean = float(forecasts.mean.iloc[-1].mean())
cond_var = float(forecasts.variance.iloc[-1].mean())
q = mdl.distribution.ppf(alpha, res.params[-2:])
VaR_garch_forecast = (cond_mean + np.sqrt(cond_var) * q) / scaling_const
return VaR_garch_forecast
def plot_hist_GARCH(data, data_name, VaR_level=0.01, ES_level=0.025, horizon = (1, 10)):
fig = plt.figure(figsize=(16, 5*len(horizon)))
ret = data.mean(axis=1)
scaling_const = 10 / ret.std()
mdl = arch_model(y=ret * scaling_const,
mean='Constant', lags=1,
vol='GARCH', p=1, o=1, q=1,
dist='skewt')
res = mdl.fit(disp = 'off')
for i, h in enumerate(horizon):
forecasts = res.forecast(horizon=h)
cond_mean = float(forecasts.mean.iloc[-1].mean())
cond_var = float(forecasts.variance.iloc[-1].mean())
q_var = mdl.distribution.ppf(VaR_level, res.params[-2:])
VaR = (cond_mean + np.sqrt(cond_var) * q_var) / scaling_const
q_es = mdl.distribution.ppf(VaR_level, res.params[-2:])
VaR_for_ES = (cond_mean + np.sqrt(cond_var) * q_es) / scaling_const
ES_garch = ret[ret < VaR_for_ES].mean()
plt.subplot(len(horizon), 1, i+1)
sns.distplot(ret[ret >= VaR], hist=True, kde=False,
bins=40,
hist_kws={'edgecolor':'black'},
kde_kws={'linewidth': 4})
sns.distplot(ret[ret < VaR], hist=True, kde=False,
bins=40,
hist_kws={'edgecolor':'black'},
kde_kws={'linewidth': 4})
plt.axvline(VaR, linewidth=4, color="r")
plt.axvline(ES_garch, linewidth=4, color='r', linestyle='dashed')
plt.title("Histogram of {} daily returns, horizon - {}".format(data_name, h), weight="bold")
plt.legend(['GARCH VaR for alpha={}%: {:.2f}%'.format(VaR_level, 100*VaR),
'GARCH ES for alpha={}%: {:.2f}%'.format(ES_level, 100*ES_garch),
'Historical Returns Distribution',
'Returns < VaR'], fontsize=12)
bonds_ret = bonds_.pct_change()
shares_ret = shares_.pct_change()
currencies_ret = currencies_.pct_change()
VaR_bonds_1, ES_bonds_1 = calc_VaR_and_ES(bonds_ret, calculate_VaR_garch, L_history=170,
level_VaR=0.01, level_ES=0.025, horizon=1)
VaR_bonds_10, ES_bonds_10 = calc_VaR_and_ES(bonds_ret, calculate_VaR_garch, L_history=170,
level_VaR=0.01, level_ES=0.025, horizon=10)
fig = plt.figure(figsize=(16, 10))
plt.subplot(211)
plt.plot(bonds_ret)
VaR_bonds_1.plot(label = 'VaR via GARCH at alpha - 0.01')
ES_bonds_1.plot(label = 'ES via GARCH at alpha - 0.025')
plt.title("Bonds daily returns, VaR and ES curves, horizon - 1", weight="bold")
plt.legend()
plt.subplot(212)
plt.plot(bonds_ret)
VaR_bonds_10.plot(label = 'VaR via GARCH at alpha - 0.01')
ES_bonds_10.plot(label = 'ES via GARCH at alpha - 0.025')
plt.title("Bonds daily returns, VaR and ES curves, horizon - 10", weight="bold")
plt.legend()
plt.tight_layout()
plt.show()
plot_hist_GARCH(bonds_ret, 'bonds')
E:\Anaconda\lib\site-packages\arch\univariate\base.py:708: ConvergenceWarning: The optimizer returned code 4. The message is: Inequality constraints incompatible See scipy.optimize.fmin_slsqp for code meaning. warnings.warn(
VaR_shares_1, ES_shares_1 = calc_VaR_and_ES(shares_ret, calculate_VaR_garch, L_history=170,
level_VaR=0.01, level_ES=0.025, horizon=1)
VaR_shares_10, ES_shares_10 = calc_VaR_and_ES(shares_ret, calculate_VaR_garch, L_history=170,
level_VaR=0.01, level_ES=0.025, horizon=10)
fig = plt.figure(figsize=(16, 10))
plt.subplot(211)
plt.plot(shares_ret)
VaR_shares_1.plot(label = 'VaR via GARCH at alpha - 0.01')
ES_shares_1.plot(label = 'ES via GARCH at alpha - 0.025')
plt.title("Shares daily returns, VaR and ES curves, horizon - 1", weight="bold")
plt.legend()
plt.subplot(212)
plt.plot(shares_ret)
VaR_shares_10.plot(label = 'VaR via GARCH at alpha - 0.01')
ES_shares_10.plot(label = 'ES via GARCH at alpha - 0.025')
plt.title("Shares daily returns, VaR and ES curves, horizon - 10", weight="bold")
plt.legend()
plt.tight_layout()
plt.show()
plot_hist_GARCH(shares_ret, 'shares')
E:\Anaconda\lib\site-packages\arch\univariate\base.py:708: ConvergenceWarning: The optimizer returned code 4. The message is: Inequality constraints incompatible See scipy.optimize.fmin_slsqp for code meaning. warnings.warn(
VaR_currencies_1, ES_currencies_1 = calc_VaR_and_ES(currencies_ret, calculate_VaR_garch, L_history=170,
level_VaR=0.01, level_ES=0.025, horizon=1)
VaR_currencies_10, ES_currencies_10 = calc_VaR_and_ES(currencies_ret, calculate_VaR_garch, L_history=170,
level_VaR=0.01, level_ES=0.025, horizon=10)
fig = plt.figure(figsize=(16, 10))
plt.subplot(211)
plt.plot(currencies_ret)
VaR_currencies_1.plot(label = 'VaR via GARCH at alpha - 0.01')
ES_currencies_1.plot(label = 'ES via GARCH at alpha - 0.025')
plt.title("Currencies daily returns, VaR and ES curves, horizon - 1", weight="bold")
plt.legend()
plt.subplot(212)
plt.plot(currencies_ret)
VaR_currencies_10.plot(label = 'VaR via GARCH at alpha - 0.01')
ES_currencies_10.plot(label = 'ES via GARCH at alpha - 0.025')
plt.title("Currencies daily returns, VaR and ES curves, horizon - 10", weight="bold")
plt.legend()
plt.tight_layout()
plt.show()
plot_hist_GARCH(currencies_ret, 'currencies')
E:\Anaconda\lib\site-packages\arch\univariate\base.py:708: ConvergenceWarning: The optimizer returned code 4. The message is: Inequality constraints incompatible See scipy.optimize.fmin_slsqp for code meaning. warnings.warn(
def bern_test(p, v):
lv = len(v)
sv = sum(v)
al = np.log(p)*sv + np.log(1-p)*(lv-sv)
bl = np.log(sv/lv)*sv + np.log(1-sv/lv)*(lv-sv)
return (-2*(al-bl))
def ind_test(V):
J = np.full([len(V),4], 0)
for i in range(1,len(V)-1):
J[i,0] = (V[i-1] == 0) & (V[i] == 0)
J[i,1] = (V[i-1] == 0) & (V[i] == 1)
J[i,2] = (V[i-1] == 1) & (V[i] == 0)
J[i,3] = (V[i-1] == 1) & (V[i] == 1)
V_00 = sum(J[:,0])
V_01 = sum(J[:,1])
V_10 = sum(J[:,2])
V_11 = sum(J[:,3])
p_00=V_00/(V_00+V_01)
p_01=V_01/(V_00+V_01)
p_10=V_10/(V_10+V_11)
p_11=V_11/(V_10+V_11)
hat_p = (V_01+V_11)/(V_00+V_01+V_10+V_11)
al = np.log(1-hat_p)*(V_00+V_10) + np.log(hat_p)*(V_01+V_11)
bl = np.log(p_00)*V_00 + np.log(p_01)*V_01 + np.log(p_10)*V_10 + np.log(p_11)*V_11
return (-2*(al-bl))
def backtest_results(ret, var_curve, alpha, significance=0.95):
idx = var_curve.notna()
violations = ret[idx] < var_curve[idx]
coverage = bern_test(p=alpha, v=violations) < ss.chi2.ppf(significance, 1)
independence = ind_test(violations) < ss.chi2.ppf(significance, 1)
print('Target share of violations: {:.2f}%'.format(100*alpha))
print('Observed share of violations: {:.2f}%'.format(100*violations.mean()))
print('')
if coverage:
print('Test for coverage is passed')
else:
print('Test for coverage isn\'t passed')
print('')
if independence:
print('Test for independence is passed')
else:
print('Test for independence isn\'t passed')
backtest_data =[bonds_, bonds_, shares_, shares_,
currencies_, currencies_, prices_of_assets, prices_of_assets]
backtest_data_VaRs = [bonds_VaR_1, bonds_VaR_10, shares_VaR_1, shares_VaR_10,
currencies_VaR_1, currencies_VaR_1, portfolio_VaR_1, portfolio_VaR_10]
for i in range(len(backtest_data)):
backtest_results(
get_log(backtest_data[i]).mean(axis=1),
var_curve=backtest_data_VaRs[i],
alpha=0.01,
significance=0.95
)
print('-'*50)
Target share of violations: 1.00% Observed share of violations: 11.12% Test for coverage isn't passed Test for independence isn't passed -------------------------------------------------- Target share of violations: 1.00% Observed share of violations: 11.02% Test for coverage isn't passed Test for independence isn't passed -------------------------------------------------- Target share of violations: 1.00% Observed share of violations: 8.84% Test for coverage isn't passed Test for independence isn't passed -------------------------------------------------- Target share of violations: 1.00% Observed share of violations: 8.74% Test for coverage isn't passed Test for independence isn't passed -------------------------------------------------- Target share of violations: 1.00% Observed share of violations: 16.58% Test for coverage isn't passed Test for independence isn't passed -------------------------------------------------- Target share of violations: 1.00% Observed share of violations: 16.58% Test for coverage isn't passed Test for independence isn't passed -------------------------------------------------- Target share of violations: 1.00% Observed share of violations: 14.20% Test for coverage isn't passed Test for independence isn't passed -------------------------------------------------- Target share of violations: 1.00% Observed share of violations: 14.20% Test for coverage isn't passed Test for independence isn't passed --------------------------------------------------